Here, the dataset is downloaded from https://www.kaggle.com/datasets/blastchar/telco-customer-churn?resource=download
This dataset consists of 7043 rows(records) and 21 columns(features). There are 20 independent features and 1 dependent feature("Churn"). The problem statement is, why the customers are moving out(Churn is nothing but leaving out of the business) of the business. We have to give solution to the stakeholder, to solve this problem and to provide a model which predicts customer churn.
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
# reading the csv file from the current directory
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
# printing the df variable which holds churn dataset from IBM
df
| customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | ... | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7590-VHVEG | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | 5575-GNVDE | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | ... | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | 3668-QPYBK | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | ... | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | 7795-CFOCW | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | ... | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | 9237-HQITU | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 6840-RESVB | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | ... | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No |
| 7039 | 2234-XADUH | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | ... | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No |
| 7040 | 4801-JZAZL | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | ... | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7041 | 8361-LTMKD | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | ... | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes |
| 7042 | 3186-AJIEK | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | ... | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No |
7043 rows × 21 columns
# describe gives a small statistical summary about the dataset(df)
df.describe()
| SeniorCitizen | tenure | MonthlyCharges | |
|---|---|---|---|
| count | 7043.000000 | 7043.000000 | 7043.000000 |
| mean | 0.162147 | 32.371149 | 64.761692 |
| std | 0.368612 | 24.559481 | 30.090047 |
| min | 0.000000 | 0.000000 | 18.250000 |
| 25% | 0.000000 | 9.000000 | 35.500000 |
| 50% | 0.000000 | 29.000000 | 70.350000 |
| 75% | 0.000000 | 55.000000 | 89.850000 |
| max | 1.000000 | 72.000000 | 118.750000 |
# dtypes gives the datatypes of the individual features in the dataframe
df.dtypes
customerID object gender object SeniorCitizen int64 Partner object Dependents object tenure int64 PhoneService object MultipleLines object InternetService object OnlineSecurity object OnlineBackup object DeviceProtection object TechSupport object StreamingTV object StreamingMovies object Contract object PaperlessBilling object PaymentMethod object MonthlyCharges float64 TotalCharges object Churn object dtype: object
df = df.drop(['customerID'], axis = 1)
df
| gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Female | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | Male | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | Male | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | Male | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | Female | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | Male | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No |
| 7039 | Female | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No |
| 7040 | Female | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7041 | Male | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes |
| 7042 | Male | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No |
7043 rows × 20 columns
df['gender'].isnull().sum()
0
There is no null value in the 'gender' feature.
df['gender'].unique()
array(['Female', 'Male'], dtype=object)
There is a two unique values in the 'gender' feature(1. Female, 2. Male). Let's do one hot encoding for this feature using get_dummies in pandas.
gender = pd.get_dummies(df['gender'], prefix = 'gender', dtype = 'int', drop_first = True)
gender
| gender_Male | |
|---|---|
| 0 | 0 |
| 1 | 1 |
| 2 | 1 |
| 3 | 1 |
| 4 | 0 |
| ... | ... |
| 7038 | 1 |
| 7039 | 0 |
| 7040 | 0 |
| 7041 | 1 |
| 7042 | 1 |
7043 rows × 1 columns
Removing the gender column from original dataframe.
df = df.drop(['gender'], axis = 1)
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No |
| 1 | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No |
| 2 | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 3 | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No |
| 4 | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No |
| 7039 | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No |
| 7040 | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No |
| 7041 | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes |
| 7042 | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No |
7043 rows × 19 columns
Now, we use concat from pandas to join 'df' and 'gender'.
df = pd.concat((df, gender), axis = 1)
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 |
| 1 | 0 | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 |
| 2 | 0 | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 |
| 3 | 0 | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 |
| 4 | 0 | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | Yes | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 |
| 7039 | 0 | Yes | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 |
| 7040 | 0 | Yes | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 |
| 7041 | 1 | Yes | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 |
| 7042 | 0 | No | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 |
7043 rows × 20 columns
df['Partner'].isnull().sum()
0
df['Partner'].unique()
array(['Yes', 'No'], dtype=object)
There are only two unique values('Yes' and 'No'). Let's do a label encoding. It means that for 'Yes', we have replace it with 1 and for 'No' we have to replace it with 0.
le = preprocessing.LabelEncoder()
df['Partner'] = le.fit_transform(df['Partner'])
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 |
| 1 | 0 | 0 | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 |
| 2 | 0 | 0 | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 |
| 3 | 0 | 0 | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 |
| 4 | 0 | 0 | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | Yes | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 |
| 7039 | 0 | 1 | Yes | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 |
| 7040 | 0 | 1 | Yes | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 |
| 7041 | 1 | 1 | No | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 |
| 7042 | 0 | 0 | No | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 |
7043 rows × 20 columns
df['Dependents'].isnull().sum()
0
df['Dependents'].unique()
array(['No', 'Yes'], dtype=object)
There are only two unique values('Yes' and 'No'). Let's do a label encoding. It means that for 'Yes', we have replace it with 1 and for 'No' we have to replace it with 0.
df['Dependents'] = le.fit_transform(df['Dependents'])
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 |
| 1 | 0 | 0 | 0 | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 |
| 2 | 0 | 0 | 0 | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 |
| 3 | 0 | 0 | 0 | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 |
| 4 | 0 | 0 | 0 | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | Yes | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 |
| 7039 | 0 | 1 | 1 | 72 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 |
| 7040 | 0 | 1 | 1 | 11 | No | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 |
| 7041 | 1 | 1 | 0 | 4 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 |
| 7042 | 0 | 0 | 0 | 66 | Yes | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 |
7043 rows × 20 columns
df['PhoneService'].isnull().sum()
0
df['PhoneService'].unique()
array(['No', 'Yes'], dtype=object)
There are only two unique values('Yes' and 'No'). Let's do a label encoding. It means that for 'Yes', we have replace it with 1 and for 'No' we have to replace it with 0.
df['PhoneService'] = le.fit_transform(df['PhoneService'])
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 |
| 2 | 0 | 0 | 0 | 2 | 1 | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 |
| 3 | 0 | 0 | 0 | 45 | 0 | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 |
| 4 | 0 | 0 | 0 | 2 | 1 | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 |
| 7039 | 0 | 1 | 1 | 72 | 1 | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | No phone service | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 |
| 7042 | 0 | 0 | 0 | 66 | 1 | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 |
7043 rows × 20 columns
df['MultipleLines'].isnull().sum()
0
df['MultipleLines'].unique()
array(['No phone service', 'No', 'Yes'], dtype=object)
df.loc[df['MultipleLines'] == "No phone service", "MultipleLines"] = "No"
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | No | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 |
| 2 | 0 | 0 | 0 | 2 | 1 | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 |
| 3 | 0 | 0 | 0 | 45 | 0 | No | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 |
| 4 | 0 | 0 | 0 | 2 | 1 | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | Yes | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 |
| 7039 | 0 | 1 | 1 | 72 | 1 | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | No | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 |
| 7042 | 0 | 0 | 0 | 66 | 1 | No | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 |
7043 rows × 20 columns
df['MultipleLines'].unique()
array(['No', 'Yes'], dtype=object)
We change that 'No phone service' to 'No'. Now, we can perform Label Encoding using the object 'le'.
df['MultipleLines'] = le.fit_transform(df['MultipleLines'])
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | DSL | Yes | No | Yes | Yes | Yes | Yes | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | Fiber optic | No | Yes | Yes | No | Yes | Yes | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | DSL | Yes | No | No | No | No | No | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | Fiber optic | Yes | No | Yes | Yes | Yes | Yes | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 |
7043 rows × 20 columns
df['InternetService'].isnull().sum()
0
df['InternetService'].unique()
array(['DSL', 'Fiber optic', 'No'], dtype=object)
Let's perform one-hot encoding for the feature 'InternetService'
internet_service = pd.get_dummies(df['InternetService'], prefix = 'InternetService', dtype = 'int')
Dropping the feature 'InternetService' and including encoded features from 'internet_service'
df = df.drop(['InternetService'], axis = 1)
df = pd.concat((df, internet_service), axis = 1)
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | ... | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | No | Yes | No | No | ... | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 | 1 | 0 | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | Yes | No | Yes | No | ... | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 | 1 | 0 | 0 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | Yes | Yes | No | No | ... | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 | 1 | 0 | 0 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | Yes | No | Yes | Yes | ... | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 | 1 | 0 | 0 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | No | No | No | No | ... | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | Yes | No | Yes | Yes | ... | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 | 1 | 0 | 0 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | No | Yes | Yes | No | ... | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 | 0 | 1 | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | Yes | No | No | No | ... | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 | 1 | 0 | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | No | No | No | No | ... | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 | 0 | 1 | 0 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | Yes | No | Yes | Yes | ... | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 | 0 | 1 | 0 |
7043 rows × 22 columns
df['OnlineSecurity'].isnull().sum()
0
df['OnlineSecurity'].unique()
array(['No', 'Yes', 'No internet service'], dtype=object)
df.loc[df['OnlineSecurity'] == "No internet service", "OnlineSecurity"] = "No"
We change that 'No internet service' to 'No'. Now, we can perform Label Encoding using the object 'le'.
df['OnlineSecurity'].unique()
array(['No', 'Yes'], dtype=object)
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | ... | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | No | Yes | No | No | ... | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 | 1 | 0 | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | Yes | No | Yes | No | ... | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 | 1 | 0 | 0 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | Yes | Yes | No | No | ... | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 | 1 | 0 | 0 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | Yes | No | Yes | Yes | ... | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 | 1 | 0 | 0 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | No | No | No | No | ... | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | Yes | No | Yes | Yes | ... | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 | 1 | 0 | 0 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | No | Yes | Yes | No | ... | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 | 0 | 1 | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | Yes | No | No | No | ... | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 | 1 | 0 | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | No | No | No | No | ... | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 | 0 | 1 | 0 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | Yes | No | Yes | Yes | ... | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 | 0 | 1 | 0 |
7043 rows × 22 columns
df['OnlineSecurity'] = le.fit_transform(df['OnlineSecurity'])
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | ... | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | Yes | No | No | ... | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 | 1 | 0 | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | 1 | No | Yes | No | ... | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 | 1 | 0 | 0 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | Yes | No | No | ... | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 | 1 | 0 | 0 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | 1 | No | Yes | Yes | ... | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 | 1 | 0 | 0 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | No | No | No | ... | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | 1 | No | Yes | Yes | ... | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 | 1 | 0 | 0 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | 0 | Yes | Yes | No | ... | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 | 0 | 1 | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | 1 | No | No | No | ... | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 | 1 | 0 | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | 0 | No | No | No | ... | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 | 0 | 1 | 0 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | 1 | No | Yes | Yes | ... | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 | 0 | 1 | 0 |
7043 rows × 22 columns
df['OnlineBackup'].isnull().sum()
0
df['OnlineBackup'].unique()
array(['Yes', 'No', 'No internet service'], dtype=object)
df.loc[df['OnlineBackup'] == "No internet service", "OnlineBackup"] = "No"
df['OnlineBackup'].unique()
array(['Yes', 'No'], dtype=object)
df['OnlineBackup'] = le.fit_transform(df['OnlineBackup'])
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | ... | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | No | No | ... | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 | 1 | 0 | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | 1 | 0 | Yes | No | ... | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 | 1 | 0 | 0 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 1 | No | No | ... | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 | 1 | 0 | 0 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | 1 | 0 | Yes | Yes | ... | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 | 1 | 0 | 0 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | No | No | ... | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | 1 | 0 | Yes | Yes | ... | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 | 1 | 0 | 0 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | 0 | 1 | Yes | No | ... | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 | 0 | 1 | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | 1 | 0 | No | No | ... | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 | 1 | 0 | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | 0 | 0 | No | No | ... | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 | 0 | 1 | 0 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | 1 | 0 | Yes | Yes | ... | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 | 0 | 1 | 0 |
7043 rows × 22 columns
df['DeviceProtection'].isnull().sum()
0
df['DeviceProtection'].unique()
array(['No', 'Yes', 'No internet service'], dtype=object)
df.loc[df['DeviceProtection'] == "No internet service", "DeviceProtection"] = "No"
df['DeviceProtection'].unique()
array(['No', 'Yes'], dtype=object)
df['DeviceProtection'] = le.fit_transform(df['DeviceProtection'])
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | ... | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | No | ... | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 | 1 | 0 | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | 1 | 0 | 1 | No | ... | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 | 1 | 0 | 0 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 1 | 0 | No | ... | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 | 1 | 0 | 0 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | 1 | 0 | 1 | Yes | ... | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 | 1 | 0 | 0 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | No | ... | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | 1 | 0 | 1 | Yes | ... | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 | 1 | 0 | 0 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | 0 | 1 | 1 | No | ... | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 | 0 | 1 | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | 1 | 0 | 0 | No | ... | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 | 1 | 0 | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | 0 | 0 | 0 | No | ... | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 | 0 | 1 | 0 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | 1 | 0 | 1 | Yes | ... | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 | 0 | 1 | 0 |
7043 rows × 22 columns
df['TechSupport'].isnull().sum()
0
df['TechSupport'].unique()
array(['No', 'Yes', 'No internet service'], dtype=object)
df.loc[df['TechSupport'] == "No internet service", "TechSupport"] = "No"
df['TechSupport'].unique()
array(['No', 'Yes'], dtype=object)
df['TechSupport'] = le.fit_transform(df['TechSupport'])
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | ... | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | ... | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | No | 0 | 1 | 0 | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | 1 | 0 | 1 | 0 | ... | One year | No | Mailed check | 56.95 | 1889.5 | No | 1 | 1 | 0 | 0 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 1 | 0 | 0 | ... | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes | 1 | 1 | 0 | 0 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | 1 | 0 | 1 | 1 | ... | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | No | 1 | 1 | 0 | 0 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | ... | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | 1 | 0 | 1 | 1 | ... | One year | Yes | Mailed check | 84.80 | 1990.5 | No | 1 | 1 | 0 | 0 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | 0 | 1 | 1 | 0 | ... | One year | Yes | Credit card (automatic) | 103.20 | 7362.9 | No | 0 | 0 | 1 | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | 1 | 0 | 0 | 0 | ... | Month-to-month | Yes | Electronic check | 29.60 | 346.45 | No | 0 | 1 | 0 | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | 0 | 0 | 0 | 0 | ... | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes | 1 | 0 | 1 | 0 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | 1 | 0 | 1 | 1 | ... | Two year | Yes | Bank transfer (automatic) | 105.65 | 6844.5 | No | 1 | 0 | 1 | 0 |
7043 rows × 22 columns
df.dtypes
SeniorCitizen int64 Partner int32 Dependents int32 tenure int64 PhoneService int32 MultipleLines int32 OnlineSecurity int32 OnlineBackup int32 DeviceProtection int32 TechSupport int32 StreamingTV object StreamingMovies object Contract object PaperlessBilling object PaymentMethod object MonthlyCharges float64 TotalCharges object Churn object gender_Male int32 InternetService_DSL int32 InternetService_Fiber optic int32 InternetService_No int32 dtype: object
df['StreamingTV'].isnull().sum()
0
df['StreamingTV'].unique()
array(['No', 'Yes', 'No internet service'], dtype=object)
df.loc[df['StreamingTV'] == "No internet service", "StreamingTV"] = "No"
df['StreamingTV'].unique()
array(['No', 'Yes'], dtype=object)
df['StreamingTV'] = le.fit_transform(df['StreamingTV'])
df['StreamingTV']
0 0
1 0
2 0
3 0
4 0
..
7038 1
7039 1
7040 0
7041 0
7042 1
Name: StreamingTV, Length: 7043, dtype: int32
df['StreamingMovies'].isnull().sum()
0
df['StreamingMovies'].unique()
array(['No', 'Yes', 'No internet service'], dtype=object)
df.loc[df['StreamingMovies'] == "No internet service", "StreamingMovies"] = "No"
df['StreamingMovies'].unique()
array(['No', 'Yes'], dtype=object)
df['StreamingMovies'] = le.fit_transform(df['StreamingMovies'])
df['StreamingMovies']
0 0
1 0
2 0
3 0
4 0
..
7038 1
7039 1
7040 0
7041 0
7042 1
Name: StreamingMovies, Length: 7043, dtype: int32
df['Contract'].isnull().sum()
0
df['Contract'].unique()
array(['Month-to-month', 'One year', 'Two year'], dtype=object)
We've to perform one hot encoding here...
contract = pd.get_dummies(df['Contract'], prefix = 'Contract', dtype = 'int' )
contract
| Contract_Month-to-month | Contract_One year | Contract_Two year | |
|---|---|---|---|
| 0 | 1 | 0 | 0 |
| 1 | 0 | 1 | 0 |
| 2 | 1 | 0 | 0 |
| 3 | 0 | 1 | 0 |
| 4 | 1 | 0 | 0 |
| ... | ... | ... | ... |
| 7038 | 0 | 1 | 0 |
| 7039 | 0 | 1 | 0 |
| 7040 | 1 | 0 | 0 |
| 7041 | 1 | 0 | 0 |
| 7042 | 0 | 0 | 1 |
7043 rows × 3 columns
df = df.drop(['Contract'], axis = 1)
df = pd.concat((df, contract), axis = 1)
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | ... | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Contract_Month-to-month | Contract_One year | Contract_Two year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | ... | 29.85 | 29.85 | No | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | 1 | 0 | 1 | 0 | ... | 56.95 | 1889.5 | No | 1 | 1 | 0 | 0 | 0 | 1 | 0 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 1 | 0 | 0 | ... | 53.85 | 108.15 | Yes | 1 | 1 | 0 | 0 | 1 | 0 | 0 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | 1 | 0 | 1 | 1 | ... | 42.30 | 1840.75 | No | 1 | 1 | 0 | 0 | 0 | 1 | 0 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 70.70 | 151.65 | Yes | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | 1 | 0 | 1 | 1 | ... | 84.80 | 1990.5 | No | 1 | 1 | 0 | 0 | 0 | 1 | 0 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | 0 | 1 | 1 | 0 | ... | 103.20 | 7362.9 | No | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 29.60 | 346.45 | No | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | 0 | 0 | 0 | 0 | ... | 74.40 | 306.6 | Yes | 1 | 0 | 1 | 0 | 1 | 0 | 0 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | 1 | 0 | 1 | 1 | ... | 105.65 | 6844.5 | No | 1 | 0 | 1 | 0 | 0 | 0 | 1 |
7043 rows × 24 columns
df['PaperlessBilling'].isnull().sum()
0
df['PaperlessBilling'].unique()
array(['Yes', 'No'], dtype=object)
df['PaperlessBilling'] = le.fit_transform(df['PaperlessBilling'])
df['PaperlessBilling']
0 1
1 0
2 1
3 0
4 1
..
7038 1
7039 1
7040 1
7041 1
7042 1
Name: PaperlessBilling, Length: 7043, dtype: int32
df['PaymentMethod'].isnull().sum()
0
df['PaymentMethod'].unique()
array(['Electronic check', 'Mailed check', 'Bank transfer (automatic)',
'Credit card (automatic)'], dtype=object)
PaymentMethod = pd.get_dummies(df['PaymentMethod'], prefix = 'PaymentMethod', dtype = 'int')
PaymentMethod
| PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | |
|---|---|---|---|---|
| 0 | 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 | 1 |
| 2 | 0 | 0 | 0 | 1 |
| 3 | 1 | 0 | 0 | 0 |
| 4 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... |
| 7038 | 0 | 0 | 0 | 1 |
| 7039 | 0 | 1 | 0 | 0 |
| 7040 | 0 | 0 | 1 | 0 |
| 7041 | 0 | 0 | 0 | 1 |
| 7042 | 1 | 0 | 0 | 0 |
7043 rows × 4 columns
df = df.drop(['PaymentMethod'], axis = 1)
df = pd.concat((df, PaymentMethod), axis = 1)
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | ... | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Contract_Month-to-month | Contract_One year | Contract_Two year | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | ... | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | 1 | 0 | 1 | 0 | ... | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 1 | 0 | 0 | ... | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | 1 | 0 | 1 | 1 | ... | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | 1 | 0 | 1 | 1 | ... | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | 0 | 1 | 1 | 0 | ... | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | 1 | 0 | 1 | 1 | ... | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 |
7043 rows × 27 columns
df.dtypes
SeniorCitizen int64 Partner int32 Dependents int32 tenure int64 PhoneService int32 MultipleLines int32 OnlineSecurity int32 OnlineBackup int32 DeviceProtection int32 TechSupport int32 StreamingTV int32 StreamingMovies int32 PaperlessBilling int32 MonthlyCharges float64 TotalCharges object Churn object gender_Male int32 InternetService_DSL int32 InternetService_Fiber optic int32 InternetService_No int32 Contract_Month-to-month int32 Contract_One year int32 Contract_Two year int32 PaymentMethod_Bank transfer (automatic) int32 PaymentMethod_Credit card (automatic) int32 PaymentMethod_Electronic check int32 PaymentMethod_Mailed check int32 dtype: object
df['TotalCharges'].isnull().sum()
0
df['TotalCharges']
0 29.85
1 1889.5
2 108.15
3 1840.75
4 151.65
...
7038 1990.5
7039 7362.9
7040 346.45
7041 306.6
7042 6844.5
Name: TotalCharges, Length: 7043, dtype: object
From the above output, we see that all are numbers(float). But, dtype tells it is an object. So, we have to convert from object type to float.
#df['TotalCharges'] = pd.to_numeric(df['TotalCharges'])
df['TotalCharges'][488]
' '
df.iloc[488]
SeniorCitizen 0 Partner 1 Dependents 1 tenure 0 PhoneService 0 MultipleLines 0 OnlineSecurity 1 OnlineBackup 0 DeviceProtection 1 TechSupport 1 StreamingTV 1 StreamingMovies 0 PaperlessBilling 1 MonthlyCharges 52.55 TotalCharges Churn No gender_Male 0 InternetService_DSL 1 InternetService_Fiber optic 0 InternetService_No 0 Contract_Month-to-month 0 Contract_One year 0 Contract_Two year 1 PaymentMethod_Bank transfer (automatic) 1 PaymentMethod_Credit card (automatic) 0 PaymentMethod_Electronic check 0 PaymentMethod_Mailed check 0 Name: 488, dtype: object
pd.set_option('display.max_columns', None)
df.where(df['tenure'] == 0).dropna()
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | PaperlessBilling | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Contract_Month-to-month | Contract_One year | Contract_Two year | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 488 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 52.55 | No | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | |
| 753 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 20.25 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | |
| 936 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 80.85 | No | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | |
| 1082 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 25.75 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | |
| 1340 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 56.05 | No | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | |
| 3331 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 19.85 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | |
| 3826 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 25.35 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | |
| 4380 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 20.00 | No | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | |
| 5218 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 19.70 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | |
| 6670 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 73.35 | No | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | |
| 6754 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 61.90 | No | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 |
to_change = df.where(df['TotalCharges'] == ' ').dropna()
to_change.index
Index([488, 753, 936, 1082, 1340, 3331, 3826, 4380, 5218, 6670, 6754], dtype='int64')
for x in to_change.index:
df.loc[df['TotalCharges'] == " ", "TotalCharges"] = df.iloc[x]['MonthlyCharges']
df.where(df['tenure'] == 0).dropna()
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | PaperlessBilling | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Contract_Month-to-month | Contract_One year | Contract_Two year | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 488 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 52.55 | 52.55 | No | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 |
| 753 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 20.25 | 52.55 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 936 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 80.85 | 52.55 | No | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 1082 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 25.75 | 52.55 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 1340 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 56.05 | 52.55 | No | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 |
| 3331 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 19.85 | 52.55 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 3826 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 25.35 | 52.55 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 4380 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 20.00 | 52.55 | No | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 5218 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 19.70 | 52.55 | No | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 6670 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 73.35 | 52.55 | No | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 6754 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 61.90 | 52.55 | No | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 |
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'])
df.dtypes
SeniorCitizen int64 Partner int32 Dependents int32 tenure int64 PhoneService int32 MultipleLines int32 OnlineSecurity int32 OnlineBackup int32 DeviceProtection int32 TechSupport int32 StreamingTV int32 StreamingMovies int32 PaperlessBilling int32 MonthlyCharges float64 TotalCharges float64 Churn object gender_Male int32 InternetService_DSL int32 InternetService_Fiber optic int32 InternetService_No int32 Contract_Month-to-month int32 Contract_One year int32 Contract_Two year int32 PaymentMethod_Bank transfer (automatic) int32 PaymentMethod_Credit card (automatic) int32 PaymentMethod_Electronic check int32 PaymentMethod_Mailed check int32 dtype: object
df['Churn'].isnull().sum()
0
df['Churn'].unique()
array(['No', 'Yes'], dtype=object)
df['Churn'] = le.fit_transform(df['Churn'])
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | PaperlessBilling | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Contract_Month-to-month | Contract_One year | Contract_Two year | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 29.85 | 29.85 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 56.95 | 1889.50 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 53.85 | 108.15 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 42.30 | 1840.75 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 70.70 | 151.65 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 84.80 | 1990.50 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 103.20 | 7362.90 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 29.60 | 346.45 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 74.40 | 306.60 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 105.65 | 6844.50 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 |
7043 rows × 27 columns
df.dtypes
SeniorCitizen int64 Partner int32 Dependents int32 tenure int64 PhoneService int32 MultipleLines int32 OnlineSecurity int32 OnlineBackup int32 DeviceProtection int32 TechSupport int32 StreamingTV int32 StreamingMovies int32 PaperlessBilling int32 MonthlyCharges float64 TotalCharges float64 Churn int32 gender_Male int32 InternetService_DSL int32 InternetService_Fiber optic int32 InternetService_No int32 Contract_Month-to-month int32 Contract_One year int32 Contract_Two year int32 PaymentMethod_Bank transfer (automatic) int32 PaymentMethod_Credit card (automatic) int32 PaymentMethod_Electronic check int32 PaymentMethod_Mailed check int32 dtype: object
temp_df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
temp_df.dtypes
customerID object gender object SeniorCitizen int64 Partner object Dependents object tenure int64 PhoneService object MultipleLines object InternetService object OnlineSecurity object OnlineBackup object DeviceProtection object TechSupport object StreamingTV object StreamingMovies object Contract object PaperlessBilling object PaymentMethod object MonthlyCharges float64 TotalCharges object Churn object dtype: object
import plotly.express as plx
fig = plx.bar(temp_df, x = 'gender', color = 'Churn')
fig.update_traces(dict(marker_line_width=0))
fig.show()
fig = plx.histogram(temp_df, x = 'SeniorCitizen', color = 'Churn')
fig.update_traces(dict(marker_line_width=0))
fig.show()
fig = plx.histogram(temp_df, x = 'Partner', color = 'Churn')
fig.update_traces(dict(marker_line_width=0))
fig.show()
fig = plx.histogram(temp_df, x = 'Dependents', color = 'Churn')
fig.update_traces(dict(marker_line_width=0))
fig.show()
fig = plx.scatter(temp_df, y = 'tenure', color = 'Churn')
fig.update_traces(dict(marker_line_width=0))
fig.show()
This scatter plot is confusing a lot. So, we're going to separe the tenure value based on churn value.
tenure_churn_yes, tenure_churn_no = [], []
for i in range(len(df)):
tenure = temp_df.iloc[i]['tenure']
churn = temp_df.iloc[i]['Churn']
if churn == 'Yes':
tenure_churn_yes.append(tenure)
else:
tenure_churn_no.append(tenure)
fig = plx.scatter(y = tenure_churn_yes, title = 'Churn with tenure')
fig.update_traces(dict(marker_line_width=0))
fig.show()
fig = plx.scatter(y = tenure_churn_no, title = 'No Churn with Tenure')
fig.update_traces(dict(marker_line_width=0))
fig.show()
fig = plx.histogram(temp_df, x = 'PhoneService', color = 'Churn')
fig.update_traces(dict(marker_line_width=0))
fig.show()
temp_df.dtypes
customerID object gender object SeniorCitizen int64 Partner object Dependents object tenure int64 PhoneService object MultipleLines object InternetService object OnlineSecurity object OnlineBackup object DeviceProtection object TechSupport object StreamingTV object StreamingMovies object Contract object PaperlessBilling object PaymentMethod object MonthlyCharges float64 TotalCharges object Churn object dtype: object
temp_df['MultipleLines'].unique()
array(['No phone service', 'No', 'Yes'], dtype=object)
temp_df.loc[temp_df['MultipleLines'] == 'No phone service', 'MultipleLines'] = 'No'
temp_df['MultipleLines'].unique()
array(['No', 'Yes'], dtype=object)
fig = plx.histogram(temp_df, x = 'MultipleLines', color = 'Churn')
fig.update_traces(dict(marker_line_width=0))
fig.show()
temp_df['InternetService'].unique()
array(['DSL', 'Fiber optic', 'No'], dtype=object)
fig = plx.histogram(temp_df, x = 'InternetService', color = 'Churn')
fig.update_traces(dict(marker_line_width=0))
fig.show()
temp_df['OnlineSecurity'].unique()
array(['No', 'Yes', 'No internet service'], dtype=object)
temp_df.loc[temp_df['OnlineSecurity'] == 'No internet service', 'OnlineSecurity'] = 'No'
temp_df['OnlineSecurity'].unique()
array(['No', 'Yes'], dtype=object)
fig = plx.histogram(temp_df, x = 'OnlineSecurity', color = 'Churn')
fig.update_traces(dict(marker_line_width=0))
fig.show()
fig = plx.scatter(temp_df, x = 'MonthlyCharges', color = 'Churn')
fig.show()
fig = plx.scatter(temp_df, x = 'TotalCharges', color = 'Churn')
fig.show()
plx.imshow(df.corr(), text_auto = True, height = 1750, width = 1750)
temp_df['Churn'].unique()
array(['No', 'Yes'], dtype=object)
churn_yes_df = temp_df.where((temp_df['Churn'] == 'Yes')).dropna()
churn_yes_df
| customerID | gender | SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | InternetService | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | Contract | PaperlessBilling | PaymentMethod | MonthlyCharges | TotalCharges | Churn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 3668-QPYBK | Male | 0.0 | No | No | 2.0 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Yes |
| 4 | 9237-HQITU | Female | 0.0 | No | No | 2.0 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Yes |
| 5 | 9305-CDSKC | Female | 0.0 | No | No | 8.0 | Yes | Yes | Fiber optic | No | No | Yes | No | Yes | Yes | Month-to-month | Yes | Electronic check | 99.65 | 820.5 | Yes |
| 8 | 7892-POOKP | Female | 0.0 | Yes | No | 28.0 | Yes | Yes | Fiber optic | No | No | Yes | Yes | Yes | Yes | Month-to-month | Yes | Electronic check | 104.80 | 3046.05 | Yes |
| 13 | 0280-XJGEX | Male | 0.0 | No | No | 49.0 | Yes | Yes | Fiber optic | No | Yes | Yes | No | Yes | Yes | Month-to-month | Yes | Bank transfer (automatic) | 103.70 | 5036.3 | Yes |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7021 | 1699-HPSBG | Male | 0.0 | No | No | 12.0 | Yes | No | DSL | No | No | No | Yes | Yes | No | One year | Yes | Electronic check | 59.80 | 727.8 | Yes |
| 7026 | 8775-CEBBJ | Female | 0.0 | No | No | 9.0 | Yes | No | DSL | No | No | No | No | No | No | Month-to-month | Yes | Bank transfer (automatic) | 44.20 | 403.35 | Yes |
| 7032 | 6894-LFHLY | Male | 1.0 | No | No | 1.0 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 75.75 | 75.75 | Yes |
| 7034 | 0639-TSIQW | Female | 0.0 | No | No | 67.0 | Yes | Yes | Fiber optic | Yes | Yes | Yes | No | Yes | No | Month-to-month | Yes | Credit card (automatic) | 102.95 | 6886.25 | Yes |
| 7041 | 8361-LTMKD | Male | 1.0 | Yes | No | 4.0 | Yes | Yes | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Mailed check | 74.40 | 306.6 | Yes |
1869 rows × 21 columns
Thereare 1869 churn customers...
churn_yes_df['Dependents'].value_counts()
Dependents No 1543 Yes 326 Name: count, dtype: int64
fig = plx.histogram(churn_yes_df, x = 'Dependents', title = "Churn on Dependents")
fig.update_traces(dict(marker_line_width=0))
fig.show()
churn_yes_df['tenure'].value_counts()
tenure
1.0 380
2.0 123
3.0 94
4.0 83
5.0 64
...
60.0 6
72.0 6
62.0 5
64.0 4
63.0 4
Name: count, Length: 72, dtype: int64
plx.histogram(x = churn_yes_df['tenure'])
churn_yes_df['PhoneService'].value_counts()
PhoneService Yes 1699 No 170 Name: count, dtype: int64
fig = plx.histogram(churn_yes_df, x = 'PhoneService', title = "Churn on PhoneService")
fig.update_traces(dict(marker_line_width=0))
fig.show()
churn_yes_df['InternetService'].value_counts()
InternetService Fiber optic 1297 DSL 459 No 113 Name: count, dtype: int64
fig = plx.histogram(churn_yes_df, x = 'InternetService', title = "Churn on InternetService")
fig.update_traces(dict(marker_line_width=0))
fig.show()
churn_yes_df['Contract'].value_counts()
Contract Month-to-month 1655 One year 166 Two year 48 Name: count, dtype: int64
fig = plx.histogram(churn_yes_df, x = 'Contract', title = "Churn on Contract")
fig.update_traces(dict(marker_line_width=0))
fig.show()
churn_yes_df['PaymentMethod'].value_counts()
PaymentMethod Electronic check 1071 Mailed check 308 Bank transfer (automatic) 258 Credit card (automatic) 232 Name: count, dtype: int64
fig = plx.histogram(churn_yes_df, x = 'PaymentMethod', title = "PaymentMethod")
fig.update_traces(dict(marker_line_width=0))
fig.show()
df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | PaperlessBilling | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Contract_Month-to-month | Contract_One year | Contract_Two year | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 29.85 | 29.85 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 56.95 | 1889.50 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 53.85 | 108.15 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 42.30 | 1840.75 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 70.70 | 151.65 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0 | 1 | 1 | 24 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 84.80 | 1990.50 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| 7039 | 0 | 1 | 1 | 72 | 1 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 103.20 | 7362.90 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
| 7040 | 0 | 1 | 1 | 11 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 29.60 | 346.45 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 7041 | 1 | 1 | 0 | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 74.40 | 306.60 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 7042 | 0 | 0 | 0 | 66 | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 105.65 | 6844.50 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 |
7043 rows × 27 columns
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
scaled_df = mms.fit_transform(df)
scaled_df = pd.DataFrame(scaled_df, columns = df.columns)
scaled_df
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | PaperlessBilling | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Contract_Month-to-month | Contract_One year | Contract_Two year | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 1.0 | 0.0 | 0.013889 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.115423 | 0.001275 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 1 | 0.0 | 0.0 | 0.0 | 0.472222 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.385075 | 0.215867 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 2 | 0.0 | 0.0 | 0.0 | 0.027778 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.354229 | 0.010310 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 3 | 0.0 | 0.0 | 0.0 | 0.625000 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.239303 | 0.210241 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
| 4 | 0.0 | 0.0 | 0.0 | 0.027778 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.521891 | 0.015330 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 0.0 | 1.0 | 1.0 | 0.333333 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.662189 | 0.227521 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 7039 | 0.0 | 1.0 | 1.0 | 1.000000 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.845274 | 0.847461 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 |
| 7040 | 0.0 | 1.0 | 1.0 | 0.152778 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.112935 | 0.037809 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 7041 | 1.0 | 1.0 | 0.0 | 0.055556 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.558706 | 0.033210 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 7042 | 0.0 | 0.0 | 0.0 | 0.916667 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.869652 | 0.787641 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 |
7043 rows × 27 columns
Now, everything sacled in the range of 0 to 1.
from keras.layers import Dense,Dropout
from keras import Sequential
ANN_model = Sequential()
# Adding Input Layer to ANN
ANN_model.add(Dense(units = 27, activation = 'relu'))
# Adding 1st Hidden Layer to the ANN
ANN_model.add(Dense(units = 15, activation = 'relu'))
ANN_model.add(Dropout(0.4))
# Adding 2nd Hidden Layer to the ANN
ANN_model.add(Dense(units = 7, activation = 'relu'))
ANN_model.add(Dropout(0.3))
# Adding Output Layer to the ANN
ANN_model.add(Dense(units = 1, activation = 'sigmoid'))
ANN_model.compile(optimizer = 'adam',
loss = 'binary_crossentropy',
metrics = ['accuracy'])
x_train, x_test, y_train, y_test = train_test_split( df.drop(['Churn'], axis = 1), df['Churn'], test_size = 0.2, random_state = 35)
import tensorflow as tf
early_stopping = tf.keras.callbacks.EarlyStopping(
monitor="accuracy",
min_delta=0.0001,
patience=10,
verbose=1,
mode="auto",
baseline=None,
restore_best_weights=False
)
model_history = ANN_model.fit(x_train, y_train, batch_size = 10, epochs = 40, validation_split = 0.33, callbacks = early_stopping )
Epoch 1/40 378/378 [==============================] - 3s 5ms/step - loss: 30.6997 - accuracy: 0.6017 - val_loss: 0.6224 - val_accuracy: 0.7511 Epoch 2/40 378/378 [==============================] - 2s 4ms/step - loss: 0.6164 - accuracy: 0.7509 - val_loss: 0.5665 - val_accuracy: 0.7597 Epoch 3/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5836 - accuracy: 0.7459 - val_loss: 0.5603 - val_accuracy: 0.7527 Epoch 4/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5691 - accuracy: 0.7480 - val_loss: 0.5598 - val_accuracy: 0.7462 Epoch 5/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5606 - accuracy: 0.7509 - val_loss: 0.5419 - val_accuracy: 0.7608 Epoch 6/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5655 - accuracy: 0.7459 - val_loss: 0.5518 - val_accuracy: 0.7527 Epoch 7/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5602 - accuracy: 0.7533 - val_loss: 0.5474 - val_accuracy: 0.7532 Epoch 8/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5630 - accuracy: 0.7509 - val_loss: 0.5282 - val_accuracy: 0.7672 Epoch 9/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5728 - accuracy: 0.7456 - val_loss: 0.5604 - val_accuracy: 0.7468 Epoch 10/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5674 - accuracy: 0.7435 - val_loss: 0.5613 - val_accuracy: 0.7452 Epoch 11/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5646 - accuracy: 0.7443 - val_loss: 0.5588 - val_accuracy: 0.7478 Epoch 12/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5643 - accuracy: 0.7440 - val_loss: 0.5456 - val_accuracy: 0.7543 Epoch 13/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5735 - accuracy: 0.7403 - val_loss: 0.5574 - val_accuracy: 0.7457 Epoch 14/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5646 - accuracy: 0.7472 - val_loss: 0.5627 - val_accuracy: 0.7478 Epoch 15/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5641 - accuracy: 0.7451 - val_loss: 0.5572 - val_accuracy: 0.7473 Epoch 16/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5660 - accuracy: 0.7462 - val_loss: 0.5556 - val_accuracy: 0.7478 Epoch 17/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5663 - accuracy: 0.7417 - val_loss: 0.5583 - val_accuracy: 0.7489 Epoch 17: early stopping
model_history.history['val_accuracy']
[0.751075267791748, 0.7596774101257324, 0.7526881694793701, 0.7462365627288818, 0.7607526779174805, 0.7526881694793701, 0.7532258033752441, 0.7672042846679688, 0.7467741966247559, 0.7451612949371338, 0.7478494644165039, 0.7543010711669922, 0.7456989288330078, 0.7478494644165039, 0.7473118305206299, 0.7478494644165039, 0.748924732208252]
model_history.history['accuracy']
[0.6017488241195679, 0.7509273886680603, 0.7458929419517517, 0.7480127215385437, 0.7509273886680603, 0.7458929419517517, 0.7533121109008789, 0.7509273886680603, 0.7456279993057251, 0.7435082197189331, 0.7443031072616577, 0.7440381646156311, 0.7403285503387451, 0.7472178339958191, 0.7450980544090271, 0.7461579442024231, 0.7416534423828125]
model_history.history.keys()
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
import plotly.graph_objects as go
fig_1 = go.Figure()
fig_1.add_trace(go.Scatter(x =np.arange(0,len(model_history.history['accuracy'])),
y = model_history.history['val_accuracy'],
mode='lines+markers',
name='val_accuracy'))
fig_1.add_trace(go.Scatter(x =np.arange(0,len(model_history.history['accuracy'])),
y = model_history.history['accuracy'],
mode='lines+markers',
name='Accuracy'))
fig_1.update_layout(title = 'ACCURACY vs VALIDATION_ACCURACY')
fig_1.update_xaxes(title_text="Epochs")
fig_1.update_yaxes(title_text="Accuracy")
fig_1.show()
model_history.history['loss']
[30.699697494506836, 0.6163778901100159, 0.5835999250411987, 0.5690638422966003, 0.5605646371841431, 0.5655323266983032, 0.5602178573608398, 0.5630198121070862, 0.5727599859237671, 0.56740403175354, 0.5646049380302429, 0.5643322467803955, 0.5735149383544922, 0.5646213889122009, 0.5640552043914795, 0.5659602880477905, 0.5663062930107117]
model_history.history['val_loss']
[0.6224174499511719, 0.566506028175354, 0.5603281259536743, 0.5597817301750183, 0.541858434677124, 0.5517999529838562, 0.5473867654800415, 0.5281704068183899, 0.5603865385055542, 0.5613142251968384, 0.558760404586792, 0.5455719232559204, 0.5573979616165161, 0.5626907348632812, 0.5572342276573181, 0.555553674697876, 0.5582592487335205]
fig_2 = go.Figure()
fig_2.add_trace(go.Scatter(x =np.arange(0,len(model_history.history['loss'])),
y = model_history.history['loss'],
mode='lines+markers',
name='loss'))
fig_2.add_trace(go.Scatter(x =np.arange(0,len(model_history.history['loss'])),
y = model_history.history['val_loss'],
mode='lines+markers',
name='val_loss'))
fig_2.update_layout(title = 'LOSS vs VALIDATION_LOSS')
fig_2.update_xaxes(title_text="Epochs")
fig_2.update_yaxes(title_text="Loss")
fig_2.show()
ANN_model.evaluate(x_test, y_test)
45/45 [==============================] - 0s 3ms/step - loss: 0.5267 - accuracy: 0.7786
[0.5266959071159363, 0.7785663604736328]
from sklearn.metrics import confusion_matrix, classification_report
predict = ANN_model.predict(x_test)
45/45 [==============================] - 0s 1ms/step
predict
array([[0.25292522],
[0.25292522],
[0.25292522],
...,
[0.38886058],
[0.25292522],
[0.25292522]], dtype=float32)
predict_new = []
for x in predict:
if x >= 0.5:
predict_new.append(1)
else:
predict_new.append(0)
predict_new[-10 : ]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
plx.imshow(confusion_matrix( y_test, predict_new), text_auto = True)
print(classification_report(y_test, predict_new))
precision recall f1-score support
0 0.77 1.00 0.87 1062
1 0.91 0.11 0.20 347
accuracy 0.78 1409
macro avg 0.84 0.55 0.54 1409
weighted avg 0.81 0.78 0.71 1409
plx.imshow(df.corr(), height = 1700, width = 1700, text_auto = True)
corr = df.corr()
df.corr()
| SeniorCitizen | Partner | Dependents | tenure | PhoneService | MultipleLines | OnlineSecurity | OnlineBackup | DeviceProtection | TechSupport | StreamingTV | StreamingMovies | PaperlessBilling | MonthlyCharges | TotalCharges | Churn | gender_Male | InternetService_DSL | InternetService_Fiber optic | InternetService_No | Contract_Month-to-month | Contract_One year | Contract_Two year | PaymentMethod_Bank transfer (automatic) | PaymentMethod_Credit card (automatic) | PaymentMethod_Electronic check | PaymentMethod_Mailed check | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SeniorCitizen | 1.000000 | 0.016479 | -0.211185 | 0.016567 | 0.008576 | 0.142948 | -0.038653 | 0.066572 | 0.059428 | -0.060625 | 0.105378 | 0.120176 | 0.156530 | 0.220173 | 0.102994 | 0.150889 | -0.001874 | -0.108322 | 0.255338 | -0.182742 | 0.138360 | -0.046262 | -0.117000 | -0.016159 | -0.024135 | 0.171718 | -0.153477 |
| Partner | 0.016479 | 1.000000 | 0.452676 | 0.379697 | 0.017706 | 0.142057 | 0.143106 | 0.141498 | 0.153786 | 0.119999 | 0.124666 | 0.117412 | -0.014877 | 0.096848 | 0.317540 | -0.150448 | -0.001808 | -0.000851 | 0.000304 | 0.000615 | -0.280865 | 0.082783 | 0.248091 | 0.110706 | 0.082029 | -0.083852 | -0.095125 |
| Dependents | -0.211185 | 0.452676 | 1.000000 | 0.159712 | -0.001762 | -0.024526 | 0.080972 | 0.023671 | 0.013963 | 0.063268 | -0.016558 | -0.039741 | -0.111377 | -0.113890 | 0.062136 | -0.164221 | 0.010517 | 0.052010 | -0.165818 | 0.139812 | -0.231720 | 0.068368 | 0.204613 | 0.052021 | 0.060267 | -0.150642 | 0.059071 |
| tenure | 0.016567 | 0.379697 | 0.159712 | 1.000000 | 0.008448 | 0.331941 | 0.327203 | 0.360277 | 0.360653 | 0.324221 | 0.279756 | 0.286111 | 0.006152 | 0.247900 | 0.826160 | -0.352229 | 0.005106 | 0.013274 | 0.019720 | -0.039062 | -0.645561 | 0.202570 | 0.558533 | 0.243510 | 0.233006 | -0.208363 | -0.233852 |
| PhoneService | 0.008576 | 0.017706 | -0.001762 | 0.008448 | 1.000000 | 0.279690 | -0.092893 | -0.052312 | -0.071227 | -0.096340 | -0.022574 | -0.032959 | 0.016505 | 0.247398 | 0.113207 | 0.011942 | -0.006488 | -0.452425 | 0.289999 | 0.172209 | -0.000742 | -0.002791 | 0.003519 | 0.007556 | -0.007721 | 0.003062 | -0.003319 |
| MultipleLines | 0.142948 | 0.142057 | -0.024526 | 0.331941 | 0.279690 | 1.000000 | 0.098108 | 0.202237 | 0.201137 | 0.100571 | 0.257152 | 0.258751 | 0.163530 | 0.490434 | 0.468516 | 0.040102 | -0.008414 | -0.199920 | 0.366083 | -0.210564 | -0.088203 | -0.003794 | 0.106253 | 0.075527 | 0.060048 | 0.083618 | -0.227206 |
| OnlineSecurity | -0.038653 | 0.143106 | 0.080972 | 0.327203 | -0.092893 | 0.098108 | 1.000000 | 0.283832 | 0.275438 | 0.354931 | 0.176207 | 0.187398 | -0.003636 | 0.296594 | 0.411672 | -0.171226 | -0.017021 | 0.321269 | -0.030696 | -0.333403 | -0.246679 | 0.100162 | 0.191773 | 0.095158 | 0.115721 | -0.112338 | -0.080798 |
| OnlineBackup | 0.066572 | 0.141498 | 0.023671 | 0.360277 | -0.052312 | 0.202237 | 0.283832 | 1.000000 | 0.303546 | 0.294233 | 0.282106 | 0.274501 | 0.126735 | 0.441780 | 0.509246 | -0.082255 | -0.013773 | 0.157884 | 0.165651 | -0.381593 | -0.164172 | 0.083722 | 0.111400 | 0.087004 | 0.090785 | -0.000408 | -0.174164 |
| DeviceProtection | 0.059428 | 0.153786 | 0.013963 | 0.360653 | -0.071227 | 0.201137 | 0.275438 | 0.303546 | 1.000000 | 0.333313 | 0.390874 | 0.402111 | 0.103797 | 0.482692 | 0.522003 | -0.066160 | -0.002105 | 0.146291 | 0.176049 | -0.380754 | -0.225662 | 0.102495 | 0.165096 | 0.083115 | 0.111554 | -0.003351 | -0.187373 |
| TechSupport | -0.060625 | 0.119999 | 0.063268 | 0.324221 | -0.096340 | 0.100571 | 0.354931 | 0.294233 | 0.333313 | 1.000000 | 0.278070 | 0.279358 | 0.037880 | 0.338304 | 0.431904 | -0.164674 | -0.009212 | 0.313118 | -0.020492 | -0.336298 | -0.285241 | 0.095775 | 0.240824 | 0.101252 | 0.117272 | -0.114839 | -0.085509 |
| StreamingTV | 0.105378 | 0.124666 | -0.016558 | 0.279756 | -0.022574 | 0.257152 | 0.176207 | 0.282106 | 0.390874 | 0.278070 | 1.000000 | 0.533094 | 0.223841 | 0.629603 | 0.514990 | 0.063228 | -0.008393 | 0.016274 | 0.329349 | -0.415552 | -0.112282 | 0.061612 | 0.072049 | 0.046252 | 0.040433 | 0.144626 | -0.247742 |
| StreamingMovies | 0.120176 | 0.117412 | -0.039741 | 0.286111 | -0.032959 | 0.258751 | 0.187398 | 0.274501 | 0.402111 | 0.279358 | 0.533094 | 1.000000 | 0.211716 | 0.627429 | 0.520118 | 0.061382 | -0.010487 | 0.025698 | 0.322923 | -0.418675 | -0.116633 | 0.064926 | 0.073960 | 0.048652 | 0.048575 | 0.137966 | -0.250595 |
| PaperlessBilling | 0.156530 | -0.014877 | -0.111377 | 0.006152 | 0.016505 | 0.163530 | -0.003636 | 0.126735 | 0.103797 | 0.037880 | 0.223841 | 0.211716 | 1.000000 | 0.352150 | 0.158557 | 0.191825 | -0.011754 | -0.063121 | 0.326853 | -0.321013 | 0.169096 | -0.051391 | -0.147889 | -0.016332 | -0.013589 | 0.208865 | -0.205398 |
| MonthlyCharges | 0.220173 | 0.096848 | -0.113890 | 0.247900 | 0.247398 | 0.490434 | 0.296594 | 0.441780 | 0.482692 | 0.338304 | 0.629603 | 0.627429 | 0.352150 | 1.000000 | 0.651169 | 0.193356 | -0.014569 | -0.160189 | 0.787066 | -0.763557 | 0.060165 | 0.004904 | -0.074681 | 0.042812 | 0.030550 | 0.271625 | -0.377437 |
| TotalCharges | 0.102994 | 0.317540 | 0.062136 | 0.826160 | 0.113207 | 0.468516 | 0.411672 | 0.509246 | 0.522003 | 0.431904 | 0.514990 | 0.520118 | 0.158557 | 0.651169 | 1.000000 | -0.198353 | -0.000077 | -0.052462 | 0.361636 | -0.375207 | -0.444311 | 0.170810 | 0.354550 | 0.185990 | 0.182910 | -0.059274 | -0.295726 |
| Churn | 0.150889 | -0.150448 | -0.164221 | -0.352229 | 0.011942 | 0.040102 | -0.171226 | -0.082255 | -0.066160 | -0.164674 | 0.063228 | 0.061382 | 0.191825 | 0.193356 | -0.198353 | 1.000000 | -0.008612 | -0.124214 | 0.308020 | -0.227890 | 0.405103 | -0.177820 | -0.302253 | -0.117937 | -0.134302 | 0.301919 | -0.091683 |
| gender_Male | -0.001874 | -0.001808 | 0.010517 | 0.005106 | -0.006488 | -0.008414 | -0.017021 | -0.013773 | -0.002105 | -0.009212 | -0.008393 | -0.010487 | -0.011754 | -0.014569 | -0.000077 | -0.008612 | 1.000000 | 0.006568 | -0.011286 | 0.006026 | -0.003386 | 0.008026 | -0.003695 | -0.016024 | 0.001215 | 0.000752 | 0.013744 |
| InternetService_DSL | -0.108322 | -0.000851 | 0.052010 | 0.013274 | -0.452425 | -0.199920 | 0.321269 | 0.157884 | 0.146291 | 0.313118 | 0.016274 | 0.025698 | -0.063121 | -0.160189 | -0.052462 | -0.124214 | 0.006568 | 1.000000 | -0.640987 | -0.380635 | -0.065509 | 0.046795 | 0.031714 | 0.025476 | 0.051438 | -0.104418 | 0.041899 |
| InternetService_Fiber optic | 0.255338 | 0.000304 | -0.165818 | 0.019720 | 0.289999 | 0.366083 | -0.030696 | 0.165651 | 0.176049 | -0.020492 | 0.329349 | 0.322923 | 0.326853 | 0.787066 | 0.361636 | 0.308020 | -0.011286 | -0.640987 | 1.000000 | -0.465793 | 0.244164 | -0.076324 | -0.211526 | -0.022624 | -0.050077 | 0.336410 | -0.306834 |
| InternetService_No | -0.182742 | 0.000615 | 0.139812 | -0.039062 | 0.172209 | -0.210564 | -0.333403 | -0.381593 | -0.380754 | -0.336298 | -0.415552 | -0.418675 | -0.321013 | -0.763557 | -0.375207 | -0.227890 | 0.006026 | -0.380635 | -0.465793 | 1.000000 | -0.218639 | 0.038004 | 0.218278 | -0.002113 | 0.001030 | -0.284917 | 0.321361 |
| Contract_Month-to-month | 0.138360 | -0.280865 | -0.231720 | -0.645561 | -0.000742 | -0.088203 | -0.246679 | -0.164172 | -0.225662 | -0.285241 | -0.112282 | -0.116633 | 0.169096 | 0.060165 | -0.444311 | 0.405103 | -0.003386 | -0.065509 | 0.244164 | -0.218639 | 1.000000 | -0.568744 | -0.622633 | -0.179707 | -0.204145 | 0.331661 | 0.004138 |
| Contract_One year | -0.046262 | 0.082783 | 0.068368 | 0.202570 | -0.002791 | -0.003794 | 0.100162 | 0.083722 | 0.102495 | 0.095775 | 0.061612 | 0.064926 | -0.051391 | 0.004904 | 0.170810 | -0.177820 | 0.008026 | 0.046795 | -0.076324 | 0.038004 | -0.568744 | 1.000000 | -0.289510 | 0.057451 | 0.067589 | -0.109130 | -0.000116 |
| Contract_Two year | -0.117000 | 0.248091 | 0.204613 | 0.558533 | 0.003519 | 0.106253 | 0.191773 | 0.111400 | 0.165096 | 0.240824 | 0.072049 | 0.073960 | -0.147889 | -0.074681 | 0.354550 | -0.302253 | -0.003695 | 0.031714 | -0.211526 | 0.218278 | -0.622633 | -0.289510 | 1.000000 | 0.154471 | 0.173265 | -0.282138 | -0.004705 |
| PaymentMethod_Bank transfer (automatic) | -0.016159 | 0.110706 | 0.052021 | 0.243510 | 0.007556 | 0.075527 | 0.095158 | 0.087004 | 0.083115 | 0.101252 | 0.046252 | 0.048652 | -0.016332 | 0.042812 | 0.185990 | -0.117937 | -0.016024 | 0.025476 | -0.022624 | -0.002113 | -0.179707 | 0.057451 | 0.154471 | 1.000000 | -0.278215 | -0.376762 | -0.288685 |
| PaymentMethod_Credit card (automatic) | -0.024135 | 0.082029 | 0.060267 | 0.233006 | -0.007721 | 0.060048 | 0.115721 | 0.090785 | 0.111554 | 0.117272 | 0.040433 | 0.048575 | -0.013589 | 0.030550 | 0.182910 | -0.134302 | 0.001215 | 0.051438 | -0.050077 | 0.001030 | -0.204145 | 0.067589 | 0.173265 | -0.278215 | 1.000000 | -0.373322 | -0.286049 |
| PaymentMethod_Electronic check | 0.171718 | -0.083852 | -0.150642 | -0.208363 | 0.003062 | 0.083618 | -0.112338 | -0.000408 | -0.003351 | -0.114839 | 0.144626 | 0.137966 | 0.208865 | 0.271625 | -0.059274 | 0.301919 | 0.000752 | -0.104418 | 0.336410 | -0.284917 | 0.331661 | -0.109130 | -0.282138 | -0.376762 | -0.373322 | 1.000000 | -0.387372 |
| PaymentMethod_Mailed check | -0.153477 | -0.095125 | 0.059071 | -0.233852 | -0.003319 | -0.227206 | -0.080798 | -0.174164 | -0.187373 | -0.085509 | -0.247742 | -0.250595 | -0.205398 | -0.377437 | -0.295726 | -0.091683 | 0.013744 | 0.041899 | -0.306834 | 0.321361 | 0.004138 | -0.000116 | -0.004705 | -0.288685 | -0.286049 | -0.387372 | 1.000000 |
index = corr['Churn'].index
values = corr['Churn'].values
values = [abs(x) for x in values]
sort_df = pd.DataFrame()
sort_df['index'] , sort_df['values'] = list(index), list(values)
sort_df
| index | values | |
|---|---|---|
| 0 | SeniorCitizen | 0.150889 |
| 1 | Partner | 0.150448 |
| 2 | Dependents | 0.164221 |
| 3 | tenure | 0.352229 |
| 4 | PhoneService | 0.011942 |
| 5 | MultipleLines | 0.040102 |
| 6 | OnlineSecurity | 0.171226 |
| 7 | OnlineBackup | 0.082255 |
| 8 | DeviceProtection | 0.066160 |
| 9 | TechSupport | 0.164674 |
| 10 | StreamingTV | 0.063228 |
| 11 | StreamingMovies | 0.061382 |
| 12 | PaperlessBilling | 0.191825 |
| 13 | MonthlyCharges | 0.193356 |
| 14 | TotalCharges | 0.198353 |
| 15 | Churn | 1.000000 |
| 16 | gender_Male | 0.008612 |
| 17 | InternetService_DSL | 0.124214 |
| 18 | InternetService_Fiber optic | 0.308020 |
| 19 | InternetService_No | 0.227890 |
| 20 | Contract_Month-to-month | 0.405103 |
| 21 | Contract_One year | 0.177820 |
| 22 | Contract_Two year | 0.302253 |
| 23 | PaymentMethod_Bank transfer (automatic) | 0.117937 |
| 24 | PaymentMethod_Credit card (automatic) | 0.134302 |
| 25 | PaymentMethod_Electronic check | 0.301919 |
| 26 | PaymentMethod_Mailed check | 0.091683 |
sort_df = sort_df.sort_values(by=['values'])
# sort_df is sorted by the column 'values'
sort_df.reset_index(inplace = True)
to_remove_features = list(sort_df['index'][0 : 17])
to_remove_features
['gender_Male', 'PhoneService', 'MultipleLines', 'StreamingMovies', 'StreamingTV', 'DeviceProtection', 'OnlineBackup', 'PaymentMethod_Mailed check', 'PaymentMethod_Bank transfer (automatic)', 'InternetService_DSL', 'PaymentMethod_Credit card (automatic)', 'Partner', 'SeniorCitizen', 'Dependents', 'TechSupport', 'OnlineSecurity', 'Contract_One year']
These are the 17 least important features of the dataset
df = df.drop(to_remove_features, axis = 1)
df
| tenure | PaperlessBilling | MonthlyCharges | TotalCharges | Churn | InternetService_Fiber optic | InternetService_No | Contract_Month-to-month | Contract_Two year | PaymentMethod_Electronic check | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 29.85 | 29.85 | 0 | 0 | 0 | 1 | 0 | 1 |
| 1 | 34 | 0 | 56.95 | 1889.50 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 2 | 1 | 53.85 | 108.15 | 1 | 0 | 0 | 1 | 0 | 0 |
| 3 | 45 | 0 | 42.30 | 1840.75 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 2 | 1 | 70.70 | 151.65 | 1 | 1 | 0 | 1 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7038 | 24 | 1 | 84.80 | 1990.50 | 0 | 0 | 0 | 0 | 0 | 0 |
| 7039 | 72 | 1 | 103.20 | 7362.90 | 0 | 1 | 0 | 0 | 0 | 0 |
| 7040 | 11 | 1 | 29.60 | 346.45 | 0 | 0 | 0 | 1 | 0 | 1 |
| 7041 | 4 | 1 | 74.40 | 306.60 | 1 | 1 | 0 | 1 | 0 | 0 |
| 7042 | 66 | 1 | 105.65 | 6844.50 | 0 | 1 | 0 | 0 | 1 | 0 |
7043 rows × 10 columns
x_train = x_train.drop(to_remove_features, axis = 1)
x_test = x_test.drop(to_remove_features, axis = 1)
ANN_model_2 = Sequential()
# Adding Input Layer to ANN
ANN_model_2.add(Dense(units = 9, activation = 'relu'))
# Adding 1st Hidden Layer to the ANN
ANN_model_2.add(Dense(units = 7, activation = 'relu'))
#ANN_model_2.add(Dropout(0.3))
# Adding 2nd Hidden Layer to the ANN
ANN_model_2.add(Dense(units = 3, activation = 'relu'))
#ANN_model_2.add(Dropout(0.3))
# Adding Output Layer to the ANN
ANN_model_2.add(Dense(units = 1, activation = 'sigmoid'))
# Compiling the ANN model with required parameters
ANN_model_2.compile(optimizer = 'adam',
loss = 'binary_crossentropy',
metrics = ['accuracy'])
# Early Stopping is provided to avoid running too many epochs of no improvement.
early_stopping = tf.keras.callbacks.EarlyStopping(
monitor="accuracy",
min_delta=0.0001,
patience=10,
verbose=1,
mode="auto",
baseline=None,
restore_best_weights=True
)
model_history_2 = ANN_model_2.fit(x_train, y_train, batch_size = 10, epochs = 40, validation_split = 0.33, callbacks = early_stopping )
Epoch 1/40 378/378 [==============================] - 3s 6ms/step - loss: 0.7003 - accuracy: 0.7395 - val_loss: 0.6337 - val_accuracy: 0.7511 Epoch 2/40 378/378 [==============================] - 2s 4ms/step - loss: 0.6113 - accuracy: 0.7576 - val_loss: 0.5985 - val_accuracy: 0.7505 Epoch 3/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5821 - accuracy: 0.7546 - val_loss: 0.5769 - val_accuracy: 0.7505 Epoch 4/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5731 - accuracy: 0.7589 - val_loss: 0.5674 - val_accuracy: 0.7500 Epoch 5/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5555 - accuracy: 0.7578 - val_loss: 0.5559 - val_accuracy: 0.7516 Epoch 6/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5480 - accuracy: 0.7583 - val_loss: 0.5513 - val_accuracy: 0.7575 Epoch 7/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5446 - accuracy: 0.7570 - val_loss: 0.5348 - val_accuracy: 0.7624 Epoch 8/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5399 - accuracy: 0.7626 - val_loss: 0.5498 - val_accuracy: 0.7581 Epoch 9/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5275 - accuracy: 0.7692 - val_loss: 0.5290 - val_accuracy: 0.7710 Epoch 10/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5216 - accuracy: 0.7700 - val_loss: 0.5331 - val_accuracy: 0.7677 Epoch 11/40 378/378 [==============================] - 2s 7ms/step - loss: 0.5176 - accuracy: 0.7692 - val_loss: 0.5346 - val_accuracy: 0.7656 Epoch 12/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5181 - accuracy: 0.7684 - val_loss: 0.5259 - val_accuracy: 0.7677 Epoch 13/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5212 - accuracy: 0.7684 - val_loss: 0.5511 - val_accuracy: 0.7565 Epoch 14/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5199 - accuracy: 0.7713 - val_loss: 0.5154 - val_accuracy: 0.7737 Epoch 15/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5078 - accuracy: 0.7703 - val_loss: 0.5110 - val_accuracy: 0.7715 Epoch 16/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5102 - accuracy: 0.7689 - val_loss: 0.5569 - val_accuracy: 0.7511 Epoch 17/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5082 - accuracy: 0.7700 - val_loss: 0.5086 - val_accuracy: 0.7656 Epoch 18/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4993 - accuracy: 0.7777 - val_loss: 0.5314 - val_accuracy: 0.7597 Epoch 19/40 378/378 [==============================] - 2s 5ms/step - loss: 0.4972 - accuracy: 0.7817 - val_loss: 0.5105 - val_accuracy: 0.7704 Epoch 20/40 378/378 [==============================] - 2s 6ms/step - loss: 0.5016 - accuracy: 0.7801 - val_loss: 0.5017 - val_accuracy: 0.7608 Epoch 21/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4932 - accuracy: 0.7795 - val_loss: 0.5147 - val_accuracy: 0.7704 Epoch 22/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4906 - accuracy: 0.7769 - val_loss: 0.4906 - val_accuracy: 0.7710 Epoch 23/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4828 - accuracy: 0.7782 - val_loss: 0.5322 - val_accuracy: 0.7613 Epoch 24/40 378/378 [==============================] - 2s 5ms/step - loss: 0.5045 - accuracy: 0.7750 - val_loss: 0.4999 - val_accuracy: 0.7758 Epoch 25/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4873 - accuracy: 0.7782 - val_loss: 0.4928 - val_accuracy: 0.7672 Epoch 26/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4802 - accuracy: 0.7864 - val_loss: 0.4907 - val_accuracy: 0.7753 Epoch 27/40 378/378 [==============================] - 2s 5ms/step - loss: 0.4820 - accuracy: 0.7803 - val_loss: 0.4916 - val_accuracy: 0.7694 Epoch 28/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4745 - accuracy: 0.7888 - val_loss: 0.5212 - val_accuracy: 0.7683 Epoch 29/40 378/378 [==============================] - 2s 5ms/step - loss: 0.4761 - accuracy: 0.7809 - val_loss: 0.4911 - val_accuracy: 0.7769 Epoch 30/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4822 - accuracy: 0.7843 - val_loss: 0.4918 - val_accuracy: 0.7758 Epoch 31/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4780 - accuracy: 0.7862 - val_loss: 0.4957 - val_accuracy: 0.7704 Epoch 32/40 378/378 [==============================] - 2s 5ms/step - loss: 0.4761 - accuracy: 0.7870 - val_loss: 0.4826 - val_accuracy: 0.7720 Epoch 33/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4770 - accuracy: 0.7817 - val_loss: 0.4954 - val_accuracy: 0.7769 Epoch 34/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4741 - accuracy: 0.7867 - val_loss: 0.4839 - val_accuracy: 0.7790 Epoch 35/40 378/378 [==============================] - 2s 5ms/step - loss: 0.4737 - accuracy: 0.7859 - val_loss: 0.4925 - val_accuracy: 0.7769 Epoch 36/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4756 - accuracy: 0.7795 - val_loss: 0.5081 - val_accuracy: 0.7753 Epoch 37/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4710 - accuracy: 0.7901 - val_loss: 0.4929 - val_accuracy: 0.7769 Epoch 38/40 378/378 [==============================] - 2s 5ms/step - loss: 0.4756 - accuracy: 0.7843 - val_loss: 0.4877 - val_accuracy: 0.7715 Epoch 39/40 378/378 [==============================] - 2s 6ms/step - loss: 0.4709 - accuracy: 0.7893 - val_loss: 0.5081 - val_accuracy: 0.7763 Epoch 40/40 378/378 [==============================] - 2s 5ms/step - loss: 0.4719 - accuracy: 0.7809 - val_loss: 0.5024 - val_accuracy: 0.7758
fig_3 = go.Figure()
fig_3.add_trace(go.Scatter(x =np.arange(0,len(model_history_2.history['accuracy'])),
y = model_history_2.history['val_accuracy'],
mode='lines+markers',
name='val_accuracy'))
fig_3.add_trace(go.Scatter(x =np.arange(0,len(model_history_2.history['accuracy'])),
y = model_history_2.history['accuracy'],
mode='lines+markers',
name='Accuracy'))
fig_3.update_layout(title = 'ACCURACY vs VALIDATION_ACCURACY')
fig_3.update_xaxes(title_text="Epochs")
fig_3.update_yaxes(title_text="Accuracy")
fig_3.show()
fig_4 = go.Figure()
fig_4.add_trace(go.Scatter(x =np.arange(0,len(model_history_2.history['loss'])),
y = model_history_2.history['loss'],
mode='lines+markers',
name='loss'))
fig_4.add_trace(go.Scatter(x =np.arange(0,len(model_history_2.history['loss'])),
y = model_history_2.history['val_loss'],
mode='lines+markers',
name='val_loss'))
fig_4.update_layout(title = 'LOSS vs VALIDATION_LOSS')
fig_4.update_xaxes(title_text="Epochs")
fig_4.update_yaxes(title_text="Loss")
fig_4.show()
ANN_model_2.evaluate(x_test, y_test)
45/45 [==============================] - 0s 3ms/step - loss: 0.4547 - accuracy: 0.8006
[0.45469385385513306, 0.8005678057670593]
predict_2 = ANN_model_2.predict(x_test)
45/45 [==============================] - 0s 1ms/step
predict_2
array([[0.1171041 ],
[0.1171041 ],
[0.1171041 ],
...,
[0.51430327],
[0.64881563],
[0.67293304]], dtype=float32)
# Converting those predicted probabilities into Binary output
predict_new_2 = []
for x in predict_2:
if x >= 0.5:
predict_new_2.append(1)
else:
predict_new_2.append(0)
predict_new_2[-10 : ]
[0, 0, 0, 0, 0, 0, 0, 1, 1, 1]
plx.imshow(confusion_matrix( y_test, predict_new_2), text_auto = True)
print(classification_report(y_test, predict_new_2))
precision recall f1-score support
0 0.82 0.94 0.88 1062
1 0.67 0.37 0.48 347
accuracy 0.80 1409
macro avg 0.75 0.66 0.68 1409
weighted avg 0.78 0.80 0.78 1409
import pickle as pkl
with open("Telco_customer_churn_prediction_model.pkl", "wb") as f:
pkl.dump(ANN_model_2, f)
INFO:tensorflow:Assets written to: ram://0fee0b00-391e-40a8-923a-4b64b302f98b/assets